Explains how to generate an interactive heatmap from a SingleCellExperiment object Includes a summarizeHeatmap function that calculates the median counts by channel and by cluster (or any other variable in colData(sce)).
For more information about heatmaply, see the heatmaply vignette and the original heatmaply publication.
SingleCellExperiment, heatmaply, data.table.rownames(sce).library(SingleCellExperiment)
library(heatmaply)
Contains a subset of the pancreas dataset
fn.sce <- file.path('..', '..', '..', 'data', 'pancreas_example_sce.rds')
sce <- readRDS(fn.sce)
sce
## class: SingleCellExperiment
## dim: 38 12274
## metadata(0):
## assays(1): counts
## rownames(38): H3 SMA ... Ir191 Ir193
## rowData names(3): channel metal target
## colnames(12274): E34_1 E34_2 ... J02_4952 J02_4953
## colData names(21): slide id ... Age Gender
## reducedDimNames(0):
## spikeNames(0):
## altExpNames(0):
Returns median counts by cluster and by channel. The arguments are the following:
sce = A SingleCellExperiment object.
expr_values = A string corresponding to an assay in the sce, should be in assayNames(sce).
cluster_by = Name of the column containing the clusters.
channels = channels to include, should be in rownames(sce). If NULL, all channels will be summarized.
FUN = c(“median”,“mean”) chose if the median or the mean should be returned (default:“median”) (optional).
summarizeHeatmap <- function(sce, expr_values, cluster_by, channels=NULL, FUN="median"){
require(data.table)
# Argument checks
if(is.null(expr_values) | !(expr_values %in% assayNames(sce))){
expr_values <- assayNames(sce)[1]
print(paste0("Warning: Assay type not provided or assay type not in 'assayNames(sce)', '", expr_values, "' used."))
}
if(is.null(channels)){
channels <- rownames(sce)
}
if(!all(channels %in% rownames(sce))){
stop("Channel names do not correspond to the rownames of the assay")
}
if(is.null(cluster_by)){
stop("Cluster column not provided")
}
if(!(cluster_by %in% colnames(colData(sce)))){
stop("The 'cluster_by' argument should correspond to a colData(sce) column")
}
if(length(cluster_by) > 1){
stop("'cluster_by' takes only one argument")
}
if(length(expr_values) > 1){
stop("'expr_values' takes only one argument")
}
if(!(FUN %in% c("median", "mean"))){
stop("'FUN' takes either 'median' or 'mean' as an argument")
}
# Convert the data to a melted format
dat <- as.data.table(t(assay(sce, expr_values)[channels,]))
dat[, id := colnames(sce)]
dat[, cluster := colData(sce)[, cluster_by]]
dat <- melt.data.table(dat, id.vars=c('id','cluster'), variable.name='channel', value.name=expr_values)
# Summarize the data
if(FUN == "median"){
dat.summary <- dat[, list(
summarized.val = median(get(expr_values)),
cellspercluster = .N),
by = c('channel', 'cluster')]
} else if (FUN == "mean"){
dat.summary <- dat[, list(
summarized.val = mean(get(expr_values)),
cellspercluster = .N),
by = c('channel', 'cluster')]
}
# Decast the summarized data and convert to a matrix
hm.cell <- dcast.data.table(dat.summary, formula='cluster ~ channel', value.var='summarized.val')
hm.clusters <- hm.cell$cluster
hm.cell <- as.matrix(hm.cell[, -1, with=FALSE])
# Add rownames
rownames(hm.cell) <- hm.clusters
# Return the values
return(as.matrix(hm.cell))
}
summarizeHeatmap function# Select the relevant channels
good.channels <- rownames(sce)[! rownames(sce) %in% c('H3', 'PD-1', 'cPARP', 'Ir191', 'Ir193')]
hm <- summarizeHeatmap(sce,
expr_values = 'counts',
cluster = 'CellType',
channels = good.channels,
FUN = "mean")
Displays normalized, scaled or percentized counts per cluster and channel
Normalize: https://cran.r-project.org/web/packages/heatmaply/vignettes/heatmaply.html#normalize
Scale: https://cran.r-project.org/web/packages/heatmaply/vignettes/heatmaply.html#scale
Percentize: https://cran.r-project.org/web/packages/heatmaply/vignettes/heatmaply.html#percentize
# Normalize
heatmaply(heatmaply::normalize(hm))
# Scale
heatmaply(hm, scale="column")
# Percentize
heatmaply(heatmaply::percentize(hm))
# Summarize the data
hm <- summarizeHeatmap(sce,
expr_values = 'counts',
cluster = 'CellType',
channels = good.channels)
# Caculate transformed counts
hm.exprs <- asinh(hm / 1)
# Plot the heatmap
heatmaply(heatmaply::normalize(hm.exprs))
Any other variable in colData(sce) can be passed to summarizeHeatmap.
Example with T1D stage (stage).
# Summarize the data
hm.stages <- summarizeHeatmap(sce,
expr_values = 'counts',
cluster = 'stage',
channels = good.channels,
FUN = 'mean')
# Plot the heatmap
heatmaply(hm.stages, scale="column")
Here, row side colors are defined in function of the cell types and cell categories (defined in colData(sce)$CellType and colData(sce)$CellCat).
# Get cell types and cell categories
rsc <- unique(data.frame(`Cell type`= colData(sce)$CellType,
`Cell category` = colData(sce)$CellCat))
# Plot the heatmap
heatmaply(heatmaply::normalize(hm), RowSideColors = rsc)
Displays correlation between channels
heatmaply_cor(cor(hm), distfun="spearman")
https://cran.r-project.org/web/packages/heatmaply/vignettes/heatmaply.html#correlation-heatmaps
r <- cor(hm)
cor.test.p <- function(x){
FUN <- function(x, y) cor.test(x, y)[["p.value"]]
z <- outer(
colnames(x),
colnames(x),
Vectorize(function(i,j) FUN(x[,i], x[,j]))
)
dimnames(z) <- list(colnames(x), colnames(x))
z
}
p <- cor.test.p(hm)
heatmaply_cor(
r,
distfun = "spearman",
node_type = "scatter",
point_size_mat = -log10(p),
point_size_name = "-log10(p-value)",
label_names = c("x", "y", "Correlation")
)
ggheatmap(heatmaply::normalize(hm))
The file argument can be provided to save the heatmap as an html file
# Define the name of the output html file
fn.hm <- file.path('./', "saved-heatmap.html")
# Save the heatmap to html
heatmaply(heatmaply::normalize(hm), file=fn.hm)